# imports
import pandas as pd
import plotly.express as px
import plotly.offline as pyo
# Set notebook mode to work in offline
pyo.init_notebook_mode()
# reading the CSV files
deal_dim = pd.read_csv('../data/deal_dim.csv')
deal_fact = pd.read_csv('../data/deal_fact.csv')
# checking data types and number of rows
deal_dim.info()
deal_fact.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2994 entries, 0 to 2993 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 DEAL_ID 2994 non-null int64 1 COMPANY_ID_(DEAL) 2994 non-null object 2 DEAL_PIPELINE 2994 non-null object 3 BUSINESS_TYPE_(DEAL) 2994 non-null object 4 CURRENT_DEAL_STAGE 2994 non-null object 5 CUSTOMER_SEGMENT_(DEAL) 2993 non-null object 6 CURRENT_DEAL_AMOUNT 2903 non-null float64 7 OPPORTUNITY_DATE_(DEAL) 2994 non-null object 8 BUSINESS_CASE_PROPOSAL_DATE 1752 non-null object 9 NEGOTIATION_DATE 1122 non-null object 10 VERBAL_AGREEMENT_DATE 522 non-null object 11 BOOKING_DATE 647 non-null object 12 CLOSE_DATE 2900 non-null object 13 CLOSED_LOST_DATE 1805 non-null object 14 SALES_CYCLE 647 non-null float64 15 LEGAL_CYCLE 454 non-null float64 16 MARKET_(DEAL) 2994 non-null object 17 COUNTRY_(DEAL) 2972 non-null object 18 DEAL_TYPE 2994 non-null object 19 SOURCED_BY_(DEAL) 2987 non-null object 20 LOST_REASON 1788 non-null object 21 LOST_TO_COMPETITOR 1133 non-null object 22 FORECAST_PROBABILITY 2850 non-null float64 23 BOOKINGS_FORECAST 2903 non-null float64 dtypes: float64(5), int64(1), object(18) memory usage: 561.5+ KB <class 'pandas.core.frame.DataFrame'> RangeIndex: 7781 entries, 0 to 7780 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 DEAL_ID 7781 non-null int64 1 COMPANY_ID_NUMERIC_DEAL_DATE_FACT 7781 non-null object 2 DEAL_STAGE_DATE 7772 non-null object 3 OPPORTUNITY_(DEAL) 2974 non-null float64 4 OPPORTUNITY_AMOUNT 2883 non-null float64 5 BUSINESS_CASE_PROPOSAL 1747 non-null float64 6 BUSINESS_CASE_PROPOSAL_AMOUNT 1747 non-null float64 7 DEAL_STAGE_NEGOTIATION_DATE 1121 non-null object 8 NEGOTIATION 1121 non-null float64 9 NEGOTIATION_AMOUNT 1121 non-null float64 10 DEAL_STAGE_VERBAL_AGREEMENT_DATE 522 non-null object 11 VERBAL_AGREEMENT 522 non-null float64 12 VERBAL_AGREEMENT_AMOUNT 522 non-null float64 13 DEAL_STAGE_CLOSED_LOST_DATE 1785 non-null object 14 CLOSED_LOST 1785 non-null float64 15 CLOSED_LOST_AMOUNT 1785 non-null float64 16 DEAL_ID_BOOKING_FACT 647 non-null float64 17 BOOKING 647 non-null float64 18 BOOKING_AMOUNT 647 non-null float64 dtypes: float64(13), int64(1), object(5) memory usage: 1.1+ MB
# i) Number of opportunities over time (monthly)
deal_dim['OPPORTUNITY_MONTH'] = pd.to_datetime(deal_dim['OPPORTUNITY_DATE_(DEAL)']).dt.strftime("%Y-%m")
number_of_opportunities_per_month = deal_dim.groupby('OPPORTUNITY_MONTH')['DEAL_ID'].count()
# plotting the graph
fig = px.line(number_of_opportunities_per_month, title='Number of opportunities per month',
labels={
'value': 'Number of opportunities',
'OPPORTUNITY_MONTH': 'Month'
}
)
fig.update_layout(showlegend=False)
pyo.iplot(fig, filename = 'number_of_opportunities_per_month')
# ii) Total opportunity amount and booking amount over time on a monthly basis in one graph.
# I used BUSINESS_CASE_PROPOSAL_AMOUNT instead of OPPORTUNITY_AMOUNT because data was missing
deal_fact['DEAL_STAGE_MONTH'] = pd.to_datetime(deal_fact['DEAL_STAGE_DATE']).dt.strftime("%Y-%m")
total_amount_per_month = deal_fact.groupby('DEAL_STAGE_MONTH')['BUSINESS_CASE_PROPOSAL_AMOUNT','BOOKING_AMOUNT'].sum()
# plotting the graph
fig = px.line(total_amount_per_month, title='Total amount per month',
labels={
'value': 'Total Amount $',
'DEAL_STAGE_MONTH': 'Month'
}
)
pyo.iplot(fig, filename = 'total_amount_per_month')
/var/folders/wf/z578m42550bgsqj4zdbtlxjw0000gn/T/ipykernel_37836/2921940191.py:4: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
# iii) Average Sales cycle time over time
avg_sales_cycle_per_month = deal_dim.groupby('OPPORTUNITY_MONTH')['SALES_CYCLE'].mean()
# plotting the graph
fig = px.line(avg_sales_cycle_per_month, title='Average sales cycle per month',
labels={
'value': 'Avg. Sales cycle',
'OPPORTUNITY_MONTH': 'Month'
}
)
fig.update_layout(showlegend=False)
pyo.iplot(fig, filename = 'avg_sales_cycle_per_month')